library(tidyverse)
library(here)
library(janitor)
library(devtools)
devtools::install_github("hadley/emo")
library(emo)
Problem: survey data with many Likert-style factor variables that vary by question 🙄
Pattern: several of the factor variables have the same levels 🧐
Solution: change all those variables at once! 🤩
Note:
pre <- suppressMessages(read_csv(here("r_ladies_pre_data.csv")))
pre %>%
select_if(is.character) %>%
select(contains("rye_"),
contains("hk_")) %>%
map(~tabyl(.))
## $rye_readscitext
## . n percent valid_percent
## Emerging 11 0.11 0.12643678
## Experienced 62 0.62 0.71264368
## Expert 12 0.12 0.13793103
## Very new 2 0.02 0.02298851
## <NA> 13 0.13 NA
##
## $rye_learnscivocab
## . n percent valid_percent
## Emerging 12 0.12 0.13793103
## Experienced 62 0.62 0.71264368
## Expert 11 0.11 0.12643678
## Very new 2 0.02 0.02298851
## <NA> 13 0.13 NA
##
## $rye_scitext
## . n percent valid_percent
## Emerging 26 0.26 0.29885057
## Experienced 49 0.49 0.56321839
## Expert 7 0.07 0.08045977
## Very new 5 0.05 0.05747126
## <NA> 13 0.13 NA
##
## $hk_ellparticipateinsci
## . n percent valid_percent
## Knowledgeable 60 0.60 0.68965517
## Not at all knowledgeable 2 0.02 0.02298851
## Not knowledgeable 8 0.08 0.09195402
## Very knowledgeable 17 0.17 0.19540230
## <NA> 13 0.13 NA
##
## $hk_swdparticipateinsci
## . n percent valid_percent
## Knowledgeable 59 0.59 0.67816092
## Not at all knowledgeable 3 0.03 0.03448276
## Not knowledgeable 11 0.11 0.12643678
## Very knowledgeable 14 0.14 0.16091954
## <NA> 13 0.13 NA
##
## $hk_teaching_science
## . n percent valid_percent
## Knowledgeable 57 0.57 0.6551724
## Not very knowledgeable 12 0.12 0.1379310
## Very knowledgeable 18 0.18 0.2068966
## <NA> 13 0.13 NA
##
## $hk_teaching_sci_field_trial_units
## . n percent valid_percent
## Knowledgeable 58 0.58 0.6666667
## Not knowledgeable 20 0.20 0.2298851
## Very knowledgeable 9 0.09 0.1034483
## <NA> 13 0.13 NA
rye_levels <- c("Very new", "Emerging", "Experienced", "Expert")
hk_levels <- c("Not at all knowledgeable", "Not knowledgeable", "Knowledgeable", "Very knowledgeable")
pre_f <-
pre %>%
mutate_at(vars(contains("rye_")), ~ factor(., levels = rye_levels)) %>%
mutate_at(vars(contains("hk_")), ~ factor(., levels = hk_levels))
pre_f %>%
select_if(is.factor) %>%
select(contains("rye_"), contains("hk_")) %>%
map(~tabyl(.))
## $rye_readscitext
## . n percent valid_percent
## Very new 2 0.02 0.02298851
## Emerging 11 0.11 0.12643678
## Experienced 62 0.62 0.71264368
## Expert 12 0.12 0.13793103
## <NA> 13 0.13 NA
##
## $rye_learnscivocab
## . n percent valid_percent
## Very new 2 0.02 0.02298851
## Emerging 12 0.12 0.13793103
## Experienced 62 0.62 0.71264368
## Expert 11 0.11 0.12643678
## <NA> 13 0.13 NA
##
## $rye_scitext
## . n percent valid_percent
## Very new 5 0.05 0.05747126
## Emerging 26 0.26 0.29885057
## Experienced 49 0.49 0.56321839
## Expert 7 0.07 0.08045977
## <NA> 13 0.13 NA
##
## $hk_ellparticipateinsci
## . n percent valid_percent
## Not at all knowledgeable 2 0.02 0.02298851
## Not knowledgeable 8 0.08 0.09195402
## Knowledgeable 60 0.60 0.68965517
## Very knowledgeable 17 0.17 0.19540230
## <NA> 13 0.13 NA
##
## $hk_swdparticipateinsci
## . n percent valid_percent
## Not at all knowledgeable 3 0.03 0.03448276
## Not knowledgeable 11 0.11 0.12643678
## Knowledgeable 59 0.59 0.67816092
## Very knowledgeable 14 0.14 0.16091954
## <NA> 13 0.13 NA
##
## $hk_teaching_science
## . n percent valid_percent
## Not at all knowledgeable 0 0.00 0.00
## Not knowledgeable 0 0.00 0.00
## Knowledgeable 57 0.57 0.76
## Very knowledgeable 18 0.18 0.24
## <NA> 25 0.25 NA
##
## $hk_teaching_sci_field_trial_units
## . n percent valid_percent
## Not at all knowledgeable 0 0.00 0.0000000
## Not knowledgeable 20 0.20 0.2298851
## Knowledgeable 58 0.58 0.6666667
## Very knowledgeable 9 0.09 0.1034483
## <NA> 13 0.13 NA
🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 BUT WAIT…
Here, things get interesting! The dear people that wrote this survey decided to phrase different levels of the same type of question in different ways
post <- suppressMessages(read_csv(here("post_data.csv")))
post %>%
select(contains("well_")) %>%
map(~tabyl(.))
## $well_meet_ell
## . n percent valid_percent
## Not very well 20 0.10582011 0.12121212
## Not well at all 8 0.04232804 0.04848485
## Very Well 47 0.24867725 0.28484848
## Well 90 0.47619048 0.54545455
## <NA> 24 0.12698413 NA
##
## $well_meet_sped
## . n percent valid_percent
## Not very well 30 0.15873016 0.17543860
## Not well at all 7 0.03703704 0.04093567
## Very Well 30 0.15873016 0.17543860
## Well 104 0.55026455 0.60818713
## <NA> 18 0.09523810 NA
##
## $well_meet_low
## . n percent valid_percent
## Not very well 28 0.14814815 0.15555556
## Not well at all 3 0.01587302 0.01666667
## Very Well 34 0.17989418 0.18888889
## Well 115 0.60846561 0.63888889
## <NA> 9 0.04761905 NA
##
## $well_meet_high
## . n percent valid_percent
## Not very well 5 0.026455026 0.027472527
## Not well at all 1 0.005291005 0.005494505
## Very Well 135 0.714285714 0.741758242
## Well 41 0.216931217 0.225274725
## <NA> 7 0.037037037 NA
##
## $well_teacher_guide
## . n percent valid_percent
## Just okay 26 0.13756614 0.14364641
## Not at all well 6 0.03174603 0.03314917
## Very well 70 0.37037037 0.38674033
## Well 79 0.41798942 0.43646409
## <NA> 8 0.04232804 NA
##
## $well_s_reaction
## . n percent valid_percent
## Just okay 15 0.07936508 0.08333333
## Not at all well 3 0.01587302 0.01666667
## Very well 97 0.51322751 0.53888889
## Well 65 0.34391534 0.36111111
## <NA> 9 0.04761905 NA
##
## $well_design_proposal
## . n percent valid_percent
## Not so well 4 0.02116402 0.1538462
## Not well at all 4 0.02116402 0.1538462
## Very well 6 0.03174603 0.2307692
## Well 12 0.06349206 0.4615385
## <NA> 163 0.86243386 NA
post_f <-
post %>%
mutate_at(vars(contains("well_")), ~ tolower(.))
then gsub them all away!
pattern_4 <- "very.*"
replacement_4 <- 4
pattern_1 <- ".*at all.*"
replacement_1 <- 1
pattern_2 <- "not 4"
replacement_2 <- 2
pattern_3 <- "somewhat.*"
replacement_3 <- 3
pattern_3w <- "well"
replacement_3w <- 3
post_f <-
post_f %>%
mutate_at(vars(-contains("_comment")), ~ gsub(pattern_4, replacement_4, .)) %>%
mutate_at(vars(-contains("_comment")), ~ gsub(pattern_1, replacement_1, .)) %>%
mutate_at(vars(-contains("_comment")), ~ gsub(pattern_2, replacement_2, .)) %>%
mutate_at(vars(-contains("_comment")), ~ gsub(pattern_3, replacement_3, .)) %>%
mutate_at(vars(-contains("_comment")), ~ gsub(pattern_3w, replacement_3w, .)) %>%
mutate(well_teacher_guide =
ifelse(well_teacher_guide == "just okay", "2", well_teacher_guide),
well_s_reaction =
ifelse(well_s_reaction == "just okay", "2", well_s_reaction),
well_design_proposal =
ifelse(well_design_proposal == "not so 3", "2", well_design_proposal))
post_f %>%
select(contains("well_")) %>%
map(~tabyl(.))
## $well_meet_ell
## . n percent valid_percent
## 1 8 0.04232804 0.04848485
## 2 20 0.10582011 0.12121212
## 3 90 0.47619048 0.54545455
## 4 47 0.24867725 0.28484848
## <NA> 24 0.12698413 NA
##
## $well_meet_sped
## . n percent valid_percent
## 1 7 0.03703704 0.04093567
## 2 30 0.15873016 0.17543860
## 3 104 0.55026455 0.60818713
## 4 30 0.15873016 0.17543860
## <NA> 18 0.09523810 NA
##
## $well_meet_low
## . n percent valid_percent
## 1 3 0.01587302 0.01666667
## 2 28 0.14814815 0.15555556
## 3 115 0.60846561 0.63888889
## 4 34 0.17989418 0.18888889
## <NA> 9 0.04761905 NA
##
## $well_meet_high
## . n percent valid_percent
## 1 1 0.005291005 0.005494505
## 2 5 0.026455026 0.027472527
## 3 41 0.216931217 0.225274725
## 4 135 0.714285714 0.741758242
## <NA> 7 0.037037037 NA
##
## $well_teacher_guide
## . n percent valid_percent
## 1 6 0.03174603 0.03314917
## 2 26 0.13756614 0.14364641
## 3 79 0.41798942 0.43646409
## 4 70 0.37037037 0.38674033
## <NA> 8 0.04232804 NA
##
## $well_s_reaction
## . n percent valid_percent
## 1 3 0.01587302 0.01666667
## 2 15 0.07936508 0.08333333
## 3 65 0.34391534 0.36111111
## 4 97 0.51322751 0.53888889
## <NA> 9 0.04761905 NA
##
## $well_design_proposal
## . n percent valid_percent
## 1 4 0.02116402 0.1538462
## 2 4 0.02116402 0.1538462
## 3 12 0.06349206 0.4615385
## 4 6 0.03174603 0.2307692
## <NA> 163 0.86243386 NA
🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉 🎉